/*==============================================================================
FILE NAME: Process_TitleV.do
INPUTS: TitleV_Permits.dta
OUTPUTS: TitleV_Permits_Clean.dta
==============================================================================*/

/* Set directory if working independently through code */
if c(username)=="" { //insert username
    global rootdir "" // insert root path
    global processed_data "$rootdir/rough_work/processed_data"  // Define global paths for replication package
} 

// Prevent Stata from pausing output
set more off

// Load raw Title V permits data
use "$processed_data/TitleV_Permits.dta", clear

// Drop unnecessary columns
drop NAICS LocationDescription PermitAreaName SiteName Address LAT_DEG_NUM LAT_MN_NUM LAT_SEC_NUM LONG_DEG_NUM LONG_MN_NUM LONG_SEC_NUM UTM_ZONE_NUM UTM_EAST_MTR_NUM UTM_NORTH_MTR_NUM

// Extract 2-digit SIC code and convert to numeric
tostring SIC, replace
gen SIC_2digit = substr(SIC,1,2)
drop SIC
rename SIC_2digit SIC
destring SIC, replace

// Convert and clean zip code and region columns
rename Zip ZipCode
destring ZipCode, replace
tostring TCEQRegion, replace

// Rename columns for Title V context
foreach x in City ZipCode County TCEQRegion SIC{
    rename `x' `x'_T5
}

// Rename permit column and convert to string
rename Permit PN
tostring PN, replace

// Create Title V permit indicator
gen TitleV = 1 

// Rename permit status, end date, and type columns for clarity
rename PermitStatus Permit_Status_T5 
rename PermitEndDate Permit_End_Date_T5
rename PermitType Permit_Type_T5

// Report and drop duplicate rows
duplicates report
duplicates drop 

// Save cleaned Title V permits data
save "$processed_data/TitleV_Permits_Clean.dta", replace